ggplot으로 그래프 그리기

데이터

library(ggplot2)
midwest
## # A tibble: 437 x 28
##      PID county state  area poptotal popdensity popwhite popblack popamerindian
##    <int> <chr>  <chr> <dbl>    <int>      <dbl>    <int>    <int>         <int>
##  1   561 ADAMS  IL    0.052    66090      1271.    63917     1702            98
##  2   562 ALEXA… IL    0.014    10626       759      7054     3496            19
##  3   563 BOND   IL    0.022    14991       681.    14477      429            35
##  4   564 BOONE  IL    0.017    30806      1812.    29344      127            46
##  5   565 BROWN  IL    0.018     5836       324.     5264      547            14
##  6   566 BUREAU IL    0.05     35688       714.    35157       50            65
##  7   567 CALHO… IL    0.017     5322       313.     5298        1             8
##  8   568 CARRO… IL    0.027    16805       622.    16519      111            30
##  9   569 CASS   IL    0.024    13437       560.    13384       16             8
## 10   570 CHAMP… IL    0.058   173025      2983.   146506    16559           331
## # … with 427 more rows, and 19 more variables: popasian <int>, popother <int>,
## #   percwhite <dbl>, percblack <dbl>, percamerindan <dbl>, percasian <dbl>,
## #   percother <dbl>, popadults <int>, perchsd <dbl>, percollege <dbl>,
## #   percprof <dbl>, poppovertyknown <int>, percpovertyknown <dbl>,
## #   percbelowpoverty <dbl>, percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## #   percelderlypoverty <dbl>, inmetro <int>, category <chr>

점 그래프

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_point()

색상 수정

county 별로 다른 색상

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal, color=county)) + 
  geom_point()

점 크기 수정

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal, color=county)) + 
  geom_point(size=7)

선 그래프

library(ggplot2)
ggplot(midwest, aes(x=area, y=poptotal)) + 
  geom_line()

막대 그래프

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_bar(stat="identity")

색상 추가

county별 색상 추가

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity")

값 추가

library(ggplot2)
ggplot(midwest[1:5,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white")

막대 폭 변경

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal, fill=county)) + 
  geom_bar(stat="identity", width=0.2) 

state별로 poptotal 합계

state 그룹별 합계를 구한 후 그래프에 반영하기

library(tidyverse)
library(ggplot2)
midwest %>% group_by(state) %>% summarise(all = sum(poptotal)) %>%
  ggplot(aes(x=state,y=all, fill=state)) + 
  geom_bar(stat="identity")

범례 legend

위치 변경

library(ggplot2)
ggplot(midwest[1:5,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white") +
  theme(legend.position = "top")

아래

library(ggplot2)
ggplot(midwest[1:5,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white") +
  theme(legend.position = "bottom")

폰트 변경

파란색, bold체로 변경

library(ggplot2)
ggplot(midwest[1:3,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white") +
  theme(legend.position = "bottom") +
  theme(legend.text = element_text(colour = "blue",face="bold"))

순서 변경

library(ggplot2)
ggplot(midwest[1:3,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white") +
  theme(legend.position = "bottom") +
  scale_x_discrete(limits=c("BOND","ADAMS","ALEXANDER"))

제거

library(ggplot2)
ggplot(midwest[1:3,], aes(x=county, y=poptotal, fill=county)) +
  geom_bar(stat="identity") +
  geom_text(aes(label=poptotal),vjust=1.5, color="white") +
  theme(legend.position = "nont")

축 수정

최대, 최소값

#library(ggplot2)
ggplot(midwest, aes(x=area, y=poptotal)) + 
  geom_point() +
  xlim(c(0, 0.1)) + ylim(c(0, 1000000))
## Warning: Removed 5 rows containing missing values (geom_point).

이름 Title

제목, x축, y축

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_bar(stat="identity") +
  labs(title="Population", x="County", y="Population")

이름 중앙 정렬

제목, x축, y축

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_bar(stat="identity") +
  labs(title="Population", x="County", y="Population") +
  theme(plot.title = element_text(hjust = 0.5))

x축 텍스트 45도 기울이기

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal, color=county)) + 
  geom_point(size=7) +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

x축 int를 factor로 변경

midwest %>% group_by(inmetro) %>% summarize(pop = sum(poptotal)) %>% 
  ggplot(aes(inmetro,pop,fill=factor(inmetro))) + geom_bar(stat="identity")
## `summarise()` ungrouping output (override with `.groups` argument)

회전

90도 오른쪽으로 회전

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_bar(stat="identity") +
  coord_flip()

다중 선 그래프

pivot_longer로 변환해서 그림

economics
## # A tibble: 574 x 6
##    date         pce    pop psavert uempmed unemploy
##    <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
##  1 1967-07-01  507. 198712    12.6     4.5     2944
##  2 1967-08-01  510. 198911    12.6     4.7     2945
##  3 1967-09-01  516. 199113    11.9     4.6     2958
##  4 1967-10-01  512. 199311    12.9     4.9     3143
##  5 1967-11-01  517. 199498    12.8     4.7     3066
##  6 1967-12-01  525. 199657    11.8     4.8     3018
##  7 1968-01-01  531. 199808    11.7     5.1     2878
##  8 1968-02-01  534. 199920    12.3     4.5     3001
##  9 1968-03-01  544. 200056    11.7     4.1     2877
## 10 1968-04-01  544  200208    12.3     4.6     2709
## # … with 564 more rows
economics %>% pivot_longer(-date,names_to="type",values_to="num")
## # A tibble: 2,870 x 3
##    date       type          num
##    <date>     <chr>       <dbl>
##  1 1967-07-01 pce         507. 
##  2 1967-07-01 pop      198712  
##  3 1967-07-01 psavert      12.6
##  4 1967-07-01 uempmed       4.5
##  5 1967-07-01 unemploy   2944  
##  6 1967-08-01 pce         510. 
##  7 1967-08-01 pop      198911  
##  8 1967-08-01 psavert      12.6
##  9 1967-08-01 uempmed       4.7
## 10 1967-08-01 unemploy   2945  
## # … with 2,860 more rows
economics %>% pivot_longer(-date,names_to="type",values_to="num") %>%
    ggplot(aes(date,num,color=type)) + geom_line()

히스토그램

area의 히스토그램

library(ggplot2)
ggplot(midwest, aes(x=area, fill=area)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

그룹별 별도 그래프

점 그래프, state 별로 분리, 세로

library(ggplot2)
ggplot(midwest, aes(x=area, y=poptotal, color=area)) + 
  geom_point() +
  facet_grid(state ~ .)

점 그래프, state 별로 분리, 가로

library(ggplot2)
ggplot(midwest, aes(x=area, y=poptotal, color=area)) + 
  geom_point() +
  facet_grid(. ~ state)

y축 척도 다양화

library(ggplot2)
economics %>% pivot_longer(-date,names_to="type",values_to="num") %>%
  ggplot(aes(date,num,fill=type)) + geom_bar(stat="identity") +
  facet_wrap(~type, ncol=1,scales = "free")

Heat map

library(ggplot2)
ggplot(midwest,aes(area,popamerindian)) + 
  geom_bin2d(bins=10)

산포도

library(ggplot2)
ggplot(midwest[1:10,], aes(x=county, y=poptotal)) + 
  geom_point()

다중 산포도를 그릴 경우에는 plot함수가 편리

library(ggplot2)
plot(midwest[,4:10])

추세도

회귀선 추가

ggplot(mtcars,aes(x=wt,y=mpg)) + 
  geom_point() + 
  geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'